library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.6
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# load customer RFM dataset (choose yours instead)
fcsv <- "https://raw.githubusercontent.com/multidis/hult-retail-analytics/main/customer_segmentation/datasets/customers_30.csv"
cust_rfm <- read_csv(fcsv)
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## CustomerID = col_double(),
## recency = col_double(),
## frequency = col_double(),
## monetary = col_double()
## )
cust_rfm
## # A tibble: 3,000 x 4
## CustomerID recency frequency monetary
## <dbl> <dbl> <dbl> <dbl>
## 1 14953 25 1 286.
## 2 16809 23 5 915.
## 3 18112 12 2 353.
## 4 13587 63 1 712.
## 5 14889 336 1 136.
## 6 14966 14 1 183.
## 7 13029 30 3 1248.
## 8 13650 16 6 1836.
## 9 17078 36 2 378.
## 10 13522 30 1 126.
## # … with 2,990 more rows
# explore customer metrics in the dataset
qplot(frequency, data=cust_rfm, binwidth=5)

quantile(cust_rfm$frequency, 0.7)
## 70%
## 4
quantile(cust_rfm$frequency, 0.8)
## 80%
## 6
qplot(recency, data=cust_rfm, binwidth=10)

quantile(cust_rfm$recency, 0.3)
## 30%
## 21
qplot(monetary, data=cust_rfm, binwidth=1000)

qplot(monetary, data=cust_rfm, binwidth=500, xlim=c(0, 20000))
## Warning: Removed 31 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).

quantile(cust_rfm$monetary, 0.7)
## 70%
## 1315.188
# interactive scatter plot to explore most valuable customers
plot_ly(cust_rfm, x = ~frequency, y = ~monetary, color = ~recency,
hoverinfo = "text", text = ~CustomerID) %>%
add_markers()
# edit as needed: average values over customer subsets; this is just an example
cust_rfm %>%
filter(frequency > quantile(cust_rfm$frequency, 0.7)) %>%
filter(monetary > quantile(cust_rfm$monetary, 0.7)) %>%
summarize(rec = mean(recency))
## # A tibble: 1 x 1
## rec
## <dbl>
## 1 25.7